This document begins the analysis of the STOC data. It loads in the data, demultiplexes using the hastagging barcodes, calls and removes doublets using a combination of doubletFinder and cell hashtagging, and performs initial processing using both gene expression data and surface protein data.
# Set theme
ggplot2::theme_set(ggplot2::theme_classic(base_size = 10))
normalization_method <- "log" # can be SCT or log
if(normalization_method == "SCT"){
SCT <- TRUE
seurat_assay <- "SCT"
} else {
SCT <- FALSE
seurat_assay <- "RNA"
}
# Set this after running first steps of finding doublets
pK <- 0.04
first_run <- FALSEbase_dir <- "/Users/wellskr/Documents/Analysis/Holger_Russ/mtec_organoid_multi/"
save_dir <- paste0(base_dir, "results/R_analysis/")
data_path <- paste0(base_dir,
"results/Hashtag_STOC/outs/count/filtered_feature_bc_matrix/")
sample <- "Hashtag_STOC"
# Information for cell mapping
ref_dir <-
"/Users/wellskr/Documents/Analysis/Holger_Russ/mtec_organoid_multi/files/thymus_annotated_matrix_files/"
ref_mat <- read.csv(paste0(ref_dir, "average_cluster_expression.csv"),
header = TRUE, row.names = 1)stoc_data <- create_seurat_object(sample = sample,
count_path = paste0(base_dir, "results"),
ADT = TRUE, hashtag = TRUE
)First we look at the violin plots for the counts of features, reads, surface protein, and mitochondiral percent. Overall, these look good. I will set cutoffs to 10% mitochondrial reads, between 200 and 6,000 RNA features and less than 10,000 ADTs based on these plots.
# Mitochondrial percent
stoc_data[["percent.mt"]] <- PercentageFeatureSet(stoc_data,
pattern = "^MT-")
# Plot to determine appropriate cutoffs
rna_qual <- VlnPlot(stoc_data,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3)
adt_qual <- VlnPlot(stoc_data, features = c("nCount_ADT", "nFeature_ADT"))
plot_grid(rna_qual, adt_qual,
nrow = 2, ncol = 1,
align = "hv",
axis = "tb")# Remove outliers
stoc_data <- subset(x = stoc_data, subset = percent.mt < 10 &
nFeature_RNA > 200 & nFeature_RNA < 6000 & nCount_ADT < 10000)
# Single Cell Transform normalization
stoc_data <- SCTransform(stoc_data, vars.to.regress = "percent.mt",
verbose = FALSE)
# Default normalization
DefaultAssay(stoc_data) <- "RNA"
stoc_data <- NormalizeData(stoc_data) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData()We can perform PCA in multiple ways with this data set. We can use either the gene expression data or the surface protein data. Both are shown below.
This is PCA performed on the surface protein data. A. the top genes associated with PC1 and PC2 B. PCA colored by processing group C. PCA colored by percent mt D. PCA colored by the number of features E. PCA colored by the number of UMIs F. PCA colored by demultiplexed sample
stoc_data <- PCA_dimRed(stoc_data, assay = seurat_assay)
RNA_plots <- plot_PCA(HTO = TRUE, assay = seurat_assay,
sample_object = stoc_data)plot_grid(RNA_plots$pca_plot[[1]], RNA_plots$mito_plot[[1]],
RNA_plots$nfeature_plot[[1]], RNA_plots$ncount_plot[[1]],
nrow = 2, ncol = 2,
labels = c("B", "C", "D", "E"),
align = "vh",
axis = "tb") We can use the elbow and jackstraw plots below to determine the ideal number of PCs to use for the next steps.
if(SCT){
plot_grid(RNA_plots$elbow,
labels = c("A"))
} else {
plot_grid(RNA_plots$jackstraw, RNA_plots$elbow,
labels = c("A", "B"),
align = "h",
axis = "tb")
}This is PCA performed on the surface protein data. A. the top genes associated with PC1 and PC2 B. PCA colored by processing group C. PCA colored by percent mt D. PCA colored by the number of features E. PCA colored by the number of UMIs F. PCA colored by demultiplexed sample
stoc_data <- PCA_dimRed(stoc_data, assay = "ADT")
ADT_plots <- plot_PCA(HTO = TRUE, assay = "ADT", sample_object = stoc_data)plot_grid(ADT_plots$pca_plot[[1]], ADT_plots$mito_plot[[1]],
ADT_plots$nfeature_plot[[1]], ADT_plots$ncount_plot[[1]],
nrow = 2, ncol = 2,
labels = c("B", "C", "D", "E"),
align = "vh",
axis = "tb") We can use the elbow plot below to determine the ideal number of PCs to use for the next steps.
Like the PCA, we can perform UMAP using the features from the gene expression or the surface protein. We can also use a method to combine both surface protein and gene expression to inform the UMAP.
UMAP based on gene expression alone A. UMAP colored by clusters based on gene expression B. UMAP colored by processing group C. UMAP colored by demultiplexed sample
umap_data <- group_cells(stoc_data, "STOC1", save_dir, nPCs = 27,
resolution = 0.6, assay = seurat_assay, HTO = TRUE)
stoc_data <- umap_data$object
stoc_plots <- umap_data$plots
plot_grid(stoc_plots[[1]], stoc_plots[[2]],
labels = c("A", "B"),
align = "h",
axis = "tb")UMAP based on surface protein alone A. UMAP colored by clusters based on gene expression B. UMAP colored by processing group C. UMAP colored by demultiplexed sample
umap_data <- group_cells(stoc_data, "STOC1", save_dir, nPCs = 16,
resolution = 0.6, assay = "ADT", HTO = TRUE)
stoc_data <- umap_data$object
stoc_plots <- umap_data$plots
plot_grid(stoc_plots[[1]], stoc_plots[[2]],
labels = c("A", "B"),
align = "h",
axis = "tb")UMAP based on both gene expression and surface protein A. UMAP colored by clusters based on gene expression B. UMAP colored by processing group C. UMAP colored by demultiplexed sample
# Identify multimodal neighbors. These will be stored in the neighbors slot,
# and can be accessed using bm[['weighted.nn']]
# The WNN graph can be accessed at bm[["wknn"]],
# and the SNN graph used for clustering at bm[["wsnn"]]
# Cell-specific modality weights can be accessed at bm$RNA.weight
if(SCT){
pca_slot <- "sctpca"
weight_name <- "SCT.weight"
} else{
pca_slot <- "pca"
weight_name <- "RNA.weight"
}
stoc_data <- FindMultiModalNeighbors(
stoc_data, reduction.list = list(pca_slot, "apca"),
dims.list = list(1:27, 1:16),
modality.weight.name = c(weight_name, "ADT.weight")
)stoc_data <- RunUMAP(stoc_data, nn.name = "weighted.nn",
reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
stoc_data <- FindClusters(stoc_data, graph.name = "wsnn",
algorithm = 3, resolution = 0.6, verbose = FALSE)
stoc_data[["combined_cluster"]] <- Idents(stoc_data)
col_by_list <- c("combined_cluster", "orig.ident")
if(HTO){
col_by_list <- c(col_by_list, "HTO_classification")
}
save_plot <- paste0(save_dir, "images/combined_umap.pdf")
plot_list <- plotDimRed(sample_object = stoc_data,
save_plot = NULL,
col_by = col_by_list, return_plot = TRUE,
plot_type = "wnn.umap")
plot_grid(plot_list[[1]], plot_list[[2]],
labels = c("A", "B"),
align = "h",
axis = "tb")We can map clusters based on existing datasets to provide some unbiased clues about cell type. Here, I used data from the human cell atlas of human thymic development: “A cell atlas of human thymic development defines T cell repertoire formation” DOI: 10.1126/science.aay3224 as the reference. The mapping isn’t perfect, but it will be helpful in addition to looking at marker genes. One note is that this doesn’t map to any T cell populations.
# get count matrix
DefaultAssay(stoc_data) <- seurat_assay
stoc_mat <- GetAssayData(object = stoc_data, slot = "data")
stoc_mat <- as.data.frame(stoc_mat)
# Find only 1000 variable features
stoc_var <- FindVariableFeatures(
stoc_data,
assay = "RNA",
selection.method = "vst",
nfeatures = 1000
)
stoc_genes <- VariableFeatures(stoc_var)
ref_mat <- ref_mat[rownames(ref_mat) %in% rownames(stoc_data),]Here are UMAPs colored by the cell type based on correlations with the RNA clusters. For comparison between the different clustering methods, I’ve included UMAPs based on the RNA data (A), the surface protein data (B) and the combined data(C). A. Predicted cell types projected onto the UMAP made using RNA data alone B. Predicted cell types projected onto the UMAP made using Surface protein data alone C. Predicted cell types projected onto the UMAP made using both the RNA and surface protein data
if(SCT){
Idents(stoc_data) <- "SCT_cluster"
clusters = "SCT_cluster"
} else{
Idents(stoc_data) <- "RNA_cluster"
clusters = "RNA_cluster"
}
stoc_metadata <- stoc_data[[clusters]]
stoc_metadata[[clusters]] <- as.character(stoc_metadata[[clusters]])
stoc_data[[clusters]] <- stoc_metadata[[clusters]]
# Run clustify
stoc_res <- clustify(
input = stoc_mat,
metadata = stoc_metadata,
ref_mat = ref_mat,
query_genes = stoc_genes,
cluster_col = clusters
)
stoc_cluster <- cor_to_call(stoc_res)
new_clusters <- stoc_cluster$type
names(new_clusters) <- stoc_cluster$cluster
print(stoc_cluster)# A tibble: 9 x 3
# Groups: cluster [9]
cluster type r
<chr> <chr> <dbl>
1 0 DC2 0.636
2 1 DC2 0.667
3 2 DC2 0.611
4 4 DC2 0.632
5 8 Fb_cycling 0.747
6 3 Mast 0.569
7 6 Mast 0.671
8 7 Mast 0.561
9 5 mTEC.III. 0.671
stoc_data$RNA_celltype <- new_clusters[stoc_data$RNA_cluster]
#colors <- RColorBrewer::brewer.pal(4, "Set1")
#colors <- lacroix_palette("PassionFruit", 4, "discrete")
# Apricot
# Coconut top
# PeachPear
# PassionFruit top
# Tangerine
# PommeBaya
# PinaFraise
# MelonPomelo
colors <- lacroix_palette("Coconut", 6, "discrete")
colors <- colors[c(1,2,3,5)]
names(colors) <- c("DC2", "Fb_cycling", "Mast", "mTEC.III.")
plot1 <- plotDimRed(stoc_data, col_by = "RNA_celltype", plot_type = "rna.umap",
color = colors)[[1]]
plot2 <- plotDimRed(stoc_data, col_by = "RNA_celltype", plot_type = "adt.umap",
color = colors)[[1]]
plot3 <- plotDimRed(stoc_data, col_by = "RNA_celltype", plot_type = "wnn.umap",
color = colors)[[1]]
plot_grid(plot1, plot2, plot3,
labels = c("A", "B", "C"),
nrow = 1,
ncol = 3,
align = "h",
axis = "tb"
)Here are UMAPs colored by the cell type based on correlations with the surface protein clusters. For comparison between the different clustering methods, I’ve included UMAPs based on the RNA data (A), the surface protein data (B) and the combined data(C). A. Predicted cell types projected onto the UMAP made using RNA data alone B. Predicted cell types projected onto the UMAP made using Surface protein data alone C. Predicted cell types projected onto the UMAP made using both the RNA and surface protein data
clusters <- "ADT_cluster"
stoc_metadata <- stoc_data[[clusters]]
stoc_metadata[[clusters]] <- as.character(stoc_metadata[[clusters]])
stoc_data[[clusters]] <- stoc_metadata[[clusters]]
# Run clustify
stoc_res <- clustify(
input = stoc_mat,
metadata = stoc_metadata,
ref_mat = ref_mat,
query_genes = stoc_genes,
cluster_col = clusters
)
stoc_cluster <- cor_to_call(stoc_res)
new_clusters <- stoc_cluster$type
names(new_clusters) <- stoc_cluster$cluster
print(stoc_cluster)# A tibble: 8 x 3
# Groups: cluster [8]
cluster type r
<chr> <chr> <dbl>
1 0 DC2 0.698
2 1 DC2 0.625
3 2 DC2 0.584
4 4 DC2 0.640
5 7 DC2 0.506
6 3 Mast 0.669
7 5 Mast 0.621
8 6 mTEC.III. 0.606
stoc_data$ADT_celltype <- new_clusters[stoc_data$ADT_cluster]
plot1 <- plotDimRed(stoc_data, col_by = "ADT_celltype", plot_type = "rna.umap",
color = colors)[[1]]
plot2 <- plotDimRed(stoc_data, col_by = "ADT_celltype", plot_type = "adt.umap",
color = colors)[[1]]
plot3 <- plotDimRed(stoc_data, col_by = "ADT_celltype", plot_type = "wnn.umap",
color = colors)[[1]]
plot_grid(plot1, plot2, plot3,
labels = c("A", "B", "C"),
nrow = 1,
ncol = 3,
align = "h",
axis = "tb"
)Here are UMAPs colored by the cell type based on correlations with the combined clusters. For comparison between the different clustering methods, I’ve included UMAPs based on the RNA data (A), the surface protein data (B) and the combined data(C). A. Predicted cell types projected onto the UMAP made using RNA data alone B. Predicted cell types projected onto the UMAP made using Surface protein data alone C. Predicted cell types projected onto the UMAP made using both the RNA and surface protein data D. Violin plot of the the RNA weight for the combined clusters E. Violin plot of the ADT weight for the combined clusters F. Violin plot of the RNA weight for the combined cell type G. Violin plot of the ADT weight for the combined cell type
clusters <- "combined_cluster"
stoc_metadata <- stoc_data[[clusters]]
stoc_metadata[[clusters]] <- as.character(stoc_metadata[[clusters]])
stoc_data[[clusters]] <- stoc_metadata[[clusters]]
# Run clustify
stoc_res <- clustify(
input = stoc_mat,
metadata = stoc_metadata,
ref_mat = ref_mat,
query_genes = stoc_genes,
cluster_col = clusters
)
stoc_cluster <- cor_to_call(stoc_res)
new_clusters <- stoc_cluster$type
names(new_clusters) <- stoc_cluster$cluster
print(stoc_cluster)# A tibble: 9 x 3
# Groups: cluster [9]
cluster type r
<chr> <chr> <dbl>
1 0 DC2 0.672
2 1 DC2 0.636
3 3 DC2 0.630
4 4 DC2 0.629
5 7 Fb_cycling 0.718
6 2 Mast 0.592
7 6 Mast 0.648
8 8 Mast 0.578
9 5 mTEC.III. 0.687
stoc_data$combined_celltype <- new_clusters[stoc_data$combined_cluster]
plot1 <- plotDimRed(stoc_data, col_by = "combined_celltype", plot_type = "rna.umap",
color = colors)[[1]]
plot2 <- plotDimRed(stoc_data, col_by = "combined_celltype", plot_type = "adt.umap",
color = colors)[[1]]
plot3 <- plotDimRed(stoc_data, col_by = "combined_celltype", plot_type = "wnn.umap",
color = colors)[[1]]
plot_grid(plot1, plot2, plot3,
labels = c("A", "B", "C"),
nrow = 1,
ncol = 3,
align = "h",
axis = "tb"
)violin1 <- featDistPlot(stoc_data, "RNA.weight",
sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, "ADT.weight",
sep_by = "combined_cluster")
violin3 <- featDistPlot(stoc_data, "RNA.weight",
sep_by = "combined_celltype", color = colors)
violin4 <- featDistPlot(stoc_data, "ADT.weight",
sep_by = "combined_celltype", color = colors)
plot_grid(violin1, violin2, violin3, violin4,
nrow = 2, ncol = 2, align = "hv", axis = "tb",
labels = c("D", "E", "F", "G"))Because all methods returned almost identical “cell types” I will just do cell type marker genes based on the combined UMAP and clusters. A. RNA markers B. ADT markers
Idents(stoc_data) <- "combined_celltype"
#marker_genes_rna <- FindAllMarkers(stoc_data, assay = seurat_assay)
#write.csv(marker_genes_rna, file = paste0(save_dir,
# "files/rna_markes_combined_celltype.csv"))
marker_genes_rna <- read.csv(paste0(save_dir,
"files/rna_markes_combined_celltype.csv"),
row.names = 1)
#marker_genes_adt <- FindAllMarkers(stoc_data, assay = "ADT")
#write.csv(marker_genes_adt, file = paste0(save_dir, "files/adt_markes_combined_celltype.csv"))
marker_genes_adt <- read.csv(paste0(save_dir,
"files/adt_markes_combined_celltype.csv"),
row.names = 1)
DefaultAssay(stoc_data) <- seurat_assay
top10_rna <- marker_genes_rna %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
rna_heatmap <- DoHeatmap(stoc_data, features = top10_rna$gene) + NoLegend()
DefaultAssay(stoc_data) <- "ADT"
top10_adt <- marker_genes_adt %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
adt_heatmap <- DoHeatmap(stoc_data, features = top10_adt$gene) + NoLegend()
plot_grid(rna_heatmap, adt_heatmap,
labels = c("A", "B"),
nrow = 2, ncol = 1,
align = "hv",
axis = "tb")Here are ADT and gene expression markers from the diminsionality reduction using genes alone A. RNA markers B. ADT markers
if(SCT){
Idents(stoc_data) <- "SCT_cluster"
} else{
Idents(stoc_data) <- "RNA_cluster"
}
#marker_genes_rna <- FindAllMarkers(stoc_data, assay = seurat_assay)
#write.csv(marker_genes_rna, file = paste0(save_dir,
# "files/rna_markes_gene_clusters.csv"))
marker_genes_rna <- read.csv(paste0(save_dir,
"files/rna_markes_gene_clusters.csv"),
row.names = 1)
#marker_genes_adt <- FindAllMarkers(stoc_data, assay = "ADT")
#write.csv(marker_genes_adt, file = paste0(save_dir,
# "files/adt_markes_gene_clusters.csv"))
marker_genes_adt <- read.csv(paste0(save_dir,
"files/adt_markes_gene_clusters.csv"),
row.names = 1)
DefaultAssay(stoc_data) <- seurat_assay
top10_rna <- marker_genes_rna %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
rna_heatmap <- DoHeatmap(stoc_data, features = top10_rna$gene) + NoLegend()
DefaultAssay(stoc_data) <- "ADT"
top10_adt <- marker_genes_adt %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
adt_heatmap <- DoHeatmap(stoc_data, features = top10_adt$gene) + NoLegend()
plot_grid(rna_heatmap, adt_heatmap,
labels = c("A", "B"),
nrow = 2, ncol = 1,
align = "hv",
axis = "tb")Here are ADT and gene expression markers from the diminsionality reduction using surface protein alone A. RNA markers B. ADT markers
Idents(stoc_data) <- "ADT_cluster"
#marker_genes_rna <- FindAllMarkers(stoc_data, assay = seurat_assay)
#write.csv(marker_genes_rna, file = paste0(save_dir,
# "files/rna_markes_adt_clusters.csv"))
marker_genes_rna <- read.csv(paste0(save_dir,
"files/rna_markes_adt_clusters.csv"),
row.names = 1)
#marker_genes_adt <- FindAllMarkers(stoc_data, assay = "ADT")
#write.csv(marker_genes_adt, file = paste0(save_dir,
# "files/adt_markes_rna_clusters.csv"))
marker_genes_adt <- read.csv(paste0(save_dir,
"files/adt_markes_rna_clusters.csv"),
row.names = 1)
DefaultAssay(stoc_data) <- seurat_assay
top10_rna <- marker_genes_rna %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
rna_heatmap <- DoHeatmap(stoc_data, features = top10_rna$gene) + NoLegend()
DefaultAssay(stoc_data) <- "ADT"
top10_adt <- marker_genes_adt %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
adt_heatmap <- DoHeatmap(stoc_data, features = top10_adt$gene) + NoLegend()
plot_grid(rna_heatmap, adt_heatmap,
labels = c("A", "B"),
nrow = 2, ncol = 1,
align = "hv",
axis = "tb")Here are ADT and gene expression markers from the diminsionality reduction using both genes and surface protein A. RNA markers B. ADT markers
Idents(stoc_data) <- "combined_cluster"
#marker_genes_rna <- FindAllMarkers(stoc_data, assay = seurat_assay)
#write.csv(marker_genes_rna, file = paste0(save_dir,
# "files/rna_markes_combined_clusters.csv"))
marker_genes_rna <- read.csv(paste0(save_dir,
"files/rna_markes_combined_clusters.csv"),
row.names = 1)
#marker_genes_adt <- FindAllMarkers(stoc_data, assay = "ADT")
#write.csv(marker_genes_adt, file = paste0(save_dir, "files/adt_markes_combined_clusters.csv"))
marker_genes_adt <- read.csv(paste0(save_dir, "files/adt_markes_combined_clusters.csv"))
DefaultAssay(stoc_data) <- seurat_assay
top10_rna <- marker_genes_rna %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
rna_heatmap <- DoHeatmap(stoc_data, features = top10_rna$gene) + NoLegend()
DefaultAssay(stoc_data) <- "ADT"
top10_adt <- marker_genes_adt %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC)
adt_heatmap <- DoHeatmap(stoc_data, features = top10_adt$gene) + NoLegend()
plot_grid(rna_heatmap, adt_heatmap,
labels = c("A", "B"),
nrow = 2, ncol = 1,
align = "hv",
axis = "tb")UMAPS and violin plots for different genes of interest A. Violin plots across clusters B. Violin plots across cell types C. UMAP of gene expression
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KRT17 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KRT17 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
STMN1 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
STMN1 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
IFITM2 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
IFITM2 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSAB1 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSAB1 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
CPA3 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
CPA3 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSB2 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSB2 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
GATA2 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
GATA2 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSD1 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TPSD1 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
HPGDS 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
HPGDS 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
HDC 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
HDC 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
LYZ 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
LYZ 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TOP2A 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
TOP2A 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
MKI67 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
MKI67 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KRT1 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KRT1 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
PDGFRA 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
PDGFRA 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "RNA"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KIT 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
KIT 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD205 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD205 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD326 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD326 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD140a 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD140a 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD52 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD52 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD8A 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD8A 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
DefaultAssay(stoc_data) <- "ADT"
violin1 <- featDistPlot(stoc_data, gene, sep_by = "combined_cluster")
violin2 <- featDistPlot(stoc_data, gene, sep_by = "combined_celltype",
color = colors)
umap1 <- plotDimRed(stoc_data, col_by = gene, plot_type = "wnn.umap")[[1]]
plot_grid(violin1, violin2, umap1,
labels = c("A", "B", "C"),
nrow = 3,
ncol = 1,
align = "hv",
axis = "tb")TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD4 1 (1-1,1-1) arrange gtable[layout]
TableGrob (1 x 1) "arrange": 1 grobs
z cells name grob
ADT-CD4 1 (1-1,1-1) arrange gtable[layout]
quartz_off_screen
2
# Find markers from exisiting dataset
reference_markers <- read.csv("files/thymus_annotated_matrix_files/top_markers_of_populations.csv",
header = TRUE)
reference_markers$X <- NULL
ident_cell_types <- unique(stoc_data$combined_celltype)
ident_ref_markers <- reference_markers %>%
select(ident_cell_types)
nmarkers <- ncol(ident_ref_markers) * nrow(ident_ref_markers)
desired_markers <- 40
ident_ref_markers <- ident_ref_markers %>%
top_frac(desired_markers/nmarkers) %>%
gather(celltype, gene, colnames(ident_ref_markers))
marker_genes_ref <- unique(ident_ref_markers$gene)
marker_genes_internet <- c(
"KIT", # Mast cell marker from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3999759/
"IL1RL1", # Mast cell marker
"FCER1A", # Mast cell marker
"MS4A2", # Mast cell marker
"ENPP3", # Mast cell marker
"HDC", # Mast cell marker
"TPSAB1", # Mast cell marker
"TPSB2", # Mast cell marker
"TPSD1", # Mast cell marker
"CMA1", # Mast cell marker
"CPA3", # Mast cell marker
"CTSG", # Mast cell marker
"HPGDS", # Mast cell marker
"LTC4S", # Mast cell marker
"AIRE", # mTEC marker
"KRT1", # Late mTEC marker
"KRT10", # Late mTEC marker
"FEZF2", # mTEC marker
"FOXN1", # mTEC marker
"TOP2A", # cycling marker
"MKI67", # cycling marker
"STMN1", # cycling marker
"S100A4", # Fibroblast marker
"COL3A1", # Fibroblast marker
"MGP" # Fibroblast marker
)
marker_adt_internet <- c(
"ADT-CD205", # Marker of cTECs and DCs, Ly75
"ADT-CD326", # EPCAM marker of TECs
"ADT-CD140a", # Mesenchimal cells
"ADT-CD3D", # T cells
"ADT-CD4", # T cells
"ADT-CD8", # T cells
"ADT-CD86", # Expressed on DCs among others
"ADT-CD2", # expressed on NK and T cells
"ADT-CD40", # on apcs
"ADT-CD7", # thymocytes and mature t cells
"ADT-CD14", # macrophages
"ADT-CD226", # NK and CD8 cells
"ADT-CLEC4C", # plasmacytoid dendritic cells
"ADT-CLEC12A", # Many including moncyptes and dendritic cells
"ADT-C5AR1", # seems to be on mast cells
"ADT-SIGLEC1" # macrophages
)Here I took the top genes defining each cell type of the reference (downloaded from their supplementary table) and plotted for the cell types identified to be in this data
stoc_data$combined_cluster_ord <- factor(stoc_data$combined_cluster,
levels = c(2, 8, 6, 0, 1, 3, 4, 5, 7))
stoc_data$combined_celltype <- factor(stoc_data$combined_celltype,
levels = c("Mast", "DC2", "mTEC.III.",
"Fb_cycling"))
dotplot_cluster <- DotPlot(stoc_data,
assay = "RNA",
features = marker_genes_ref,
group.by = "combined_cluster_ord")
dotplot_cluster <- dotplot_cluster +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
dotplot_celltype <- DotPlot(stoc_data,
assay = "RNA",
features = marker_genes_ref,
group.by = "combined_celltype")
dotplot_celltype <- dotplot_celltype +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_grid(dotplot_cluster, dotplot_celltype,
nrow = 2, ncol = 1,
align = "v",
axis = "tb")Here I did a rough internet search looking for markers associated with the identified cell types
dotplot_cluster <- DotPlot(stoc_data,
assay = "RNA",
features = marker_genes_internet,
group.by = "combined_cluster_ord")
dotplot_cluster <- dotplot_cluster +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
dotplot_celltype <- DotPlot(stoc_data,
assay = "RNA",
features = marker_genes_internet,
group.by = "combined_celltype")
dotplot_celltype <- dotplot_celltype +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_grid(dotplot_cluster, dotplot_celltype,
nrow = 2, ncol = 1,
align = "v",
axis = "tb")Here I did a rough internet search looking for markers associated with the identified cell types
dotplot_cluster <- DotPlot(stoc_data,
assay = "ADT",
features = marker_adt_internet,
group.by = "combined_cluster_ord")
dotplot_cluster <- dotplot_cluster +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
dotplot_celltype <- DotPlot(stoc_data,
assay = "ADT",
features = marker_adt_internet,
group.by = "combined_celltype")
dotplot_celltype <- dotplot_celltype +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_grid(dotplot_cluster, dotplot_celltype,
nrow = 2, ncol = 1,
align = "v",
axis = "tb")